################################################################################
# Created By:Pietryka
# Creation Date:  2016-09-09
# Purpose: impute missing values in the CCES Panel Data
# Questions: mpietryka@fsu.edu
################################################################################


# PREAMBLE =============================================



# LOAD PACKAGES -----------------
library(Amelia)
library(tidyr)
library(dplyr)
library(rio)


# LOAD DATA -----------------

# load 'cces_subset' object created in 'CCES1-Clean-Vars.R'
load("Data/cces_subset.RData")

# REMOVE CASES MISSING ON DV
cces_subset   <- cces_subset %>%
  mutate(missing_dv = (is.na(turnout)  %>% as.numeric()))  %>%
  filter(year != 2012)  %>%
  # REMOVE 2010 PARENTS
  filter(!(parent == 1 & year == 2010))  %>%
  select(-c(ownhome, child18, birthyr, gender))


# Impute  =============================================

m_imps <- 20
nominal_vars <- c("turnout", "parent", "employ", "marstat")

imp_in <- cces_subset


set.seed(7866)
cces_imps <- amelia(imp_in,
    idvars =  c("morethanone", "missing_dv"),
    m = m_imps,
    ts = "year",
    cs = "caseid",
    noms = nominal_vars,
    p2s = 2,
    lags = "faminc2",
    leads = "faminc2",
    empri = .01 * nrow(imp_in) # 1% RIDGE PRIOR
  )



# CLEAN IMPUTED DATA  =============================================


cces_cleaned <- lapply(cces_imps$imputations, function(x){
  x  %>%
    # REMOVE THOSE MISSING ON TURNOUT (THE DV)
    filter(!cces_imps$missMatrix[, "turnout"])  %>%
    # REMOVE 2012
    filter(year != 2012)   %>%
    # NEW VARIABLES
    mutate(married = if_else(marstat == 1, 1, 0, missing = 0),
           collgrad = if_else(educ == 5 | educ == 6, 1, 0, missing = 0),
           employfull = if_else(employ == 1, 1, 0, missing = 0),
           student = if_else(employ == 8, 1, 0, missing = 0),
           famincthirds = ntile(faminc2, 3),
           famincquart = ntile(faminc2, 4),
           countymove_change = if_else(year == 2010, 0, countymove, missing = 0)
    )
})

# SAVE ======================================================

save.image("Data/CCES2-Impute.Rdata")

